home *** CD-ROM | disk | FTP | other *** search
- /*
- C* Main routine
-
- source: cstar.c
- started: October 7, 1985
- version: see below
-
- PUBLIC DOMAIN SOFTWARE
-
- The CSTAR program was placed in the public domain on June 15, 1991,
- by its author and sole owner,
-
- Edward K. Ream
- 1617 Monroe Street
- Madison, WI 53711
- (608) 257-0802
-
- CSTAR may be used for any commercial or non-commercial purpose.
-
- DISCLAIMER OF WARRANTIES
-
- Edward K. Ream (Ream) specifically disclaims all warranties,
- expressed or implied, with respect to this computer software,
- including but not limited to implied warranties of merchantability
- and fitness for a particular purpose. In no event shall Ream be
- liable for any loss of profit or any commercial damage, including
- but not limited to special, incidental consequential or other damages.
- */
- #include "cstar.h"
-
- #define SIGNON "C Star: June 25, 1991\n"
-
- #ifdef SHERLOCK
- #define USAGE "usage: csdb [++--routine] [options] in out\n"
- #else
- #define USAGE "usage: cs [options] in out\n"
- #endif
-
- #define U1 "-d id=value Define a preprocessor constant\n"
- #define U2 "-n Allow nested C comments\n"
- #define U3 "-s path Look for include files in path\n"
- #define U4 "-u id Undefine a preprocessor constant\n"
- #define U5 "-? Print the version number and exit\n\n"
-
- #define U6 "-noparse Suppress parsing\n"
- #define U7 "-nomacro Suppress macro expansion\n"
- #define U8 "-nogen Suppress code gen\n"
- #define U9 "-nopeep Suppress peephole\n\n"
-
- #define U10 "-tokens Output tokens\n"
- #define U11 "-tree Output parse tree\n"
- #define U12 "-code1 Output code list before peephole\n"
- #define U13 "-code2 Output code list after peephole\n"
-
- static void t_init();
-
- main(argc, argv)
- int argc;
- char **argv;
- {
- char *in, *out, *arg;
- char *def;
- char *p1;
- struct u_node *p;
- int out_length;
-
- /* char *mg_alloc(); */
-
- /*
- WARNING!! Do not change the order of initializing
- the modules without CAREFUL thought.
- */
- SL_INIT();
- mm_init();
- sysinit();
- SL_PARSE(argc, argv, "++", "--");
-
- /*
- WARNING: Putting the initializations here means we
- can not trace any of the routines.
-
- On the other hand, mst_init() MUST be placed here
- so we can define macros using the -d option.
-
- The call to mst2_init() must FOLLOW the gathering of
- command line arguments so that the __line__ and __file__
- macros may be disabled using the -u option.
- */
- t_init();
- mst_init();
- st_init();
- pn_init();
- gen_init();
-
- (void) syscsts();
- TICK("main");
-
- /* Always put out the sign on message. */
- printf("%s", SIGNON);
-
- /* Make first test for correct command line. */
- if (argc == 2 && str_eq(argv[1], "-?")) {
- exit(0);
- }
- else if (argc < 3) {
- printf("\n%s\n%s%s%s%s%s%s",
- USAGE, U1, U2, U3, U4, U5, U6);
- printf("%s%s%s%s%s%s%s",
- U7, U8, U9, U10, U11, U12, U13);
- exit(0);
- }
-
- /* No search paths active yet. */
- n_paths = 0;
-
- /* Indicate that no file arguments have been seen yet. */
- in = NULL;
- out = NULL;
-
- /* Options that don't [yet] have proper option flags */
- TRACE("_noassoc", array_opt = 1;);
- TRACE("local", no_local = 0;);
-
- /* Process all the arguments on the command line. */
- argc--;
- argv++;
- while (argc-- > 0) {
- arg = *argv++;
-
- if (str_eq(arg, "-code1")) {
- code1_flag = TRUE;
- }
- else if (str_eq(arg, "-code2")) {
- code2_flag = TRUE;
- }
- else if(str_eq(arg, "-d")) {
- /* Define a variable. */
- if (argc--) {
- arg = *argv++;
- /* Scan for an optional equal sign. */
- for (def = arg; *def; def++) {
- if (*def == '=') {
- *def = '\0';
- def++;
- break;
- }
- }
- (void) mst_enter(arg, def, -1);
- }
- else {
- printf("Trailing -d\n");
- exit(0);
- }
- }
- else if (str_eq(arg, "-f")) {
- /* Full C option. */
- full_c = TRUE;
- }
- else if (str_eq(arg, "-n")) {
- /* Allow nested comments. */
- nest_flag = TRUE;
- }
- else if (str_eq(arg, "-nogen")) {
- nogen_flag = TRUE;
- }
- else if (str_eq(arg, "-nomacro")) {
- nomacro_flag = TRUE;
- }
- else if (str_eq(arg, "-noparse")) {
- noparse_flag = TRUE;
- }
- else if (str_eq(arg, "-nopeep")) {
- nopeep_flag = TRUE;
- }
- else if (str_eq(arg, "-s")) {
- /* Define a path. */
- if (argc--) {
- arg = *argv++;
- if (n_paths >= MAX_PATHS) {
- printf("too many path names.\n");
- exit(0);
- }
- else {
- p1 = mg_alloc(strlen(arg)+2);
- str_cpy(p1, arg);
- if (arg[strlen(arg)-1] != '\\') {
- str_cat(p1, "\\");
- }
- paths [n_paths++] = p1;
- }
- }
- else {
- printf("Trailing -s.\n");
- exit(0);
- }
- }
- else if (str_eq(arg, "-tokens")) {
- token_flag = TRUE;
- }
- else if (str_eq(arg, "-tree")) {
- tree_flag = TRUE;
- }
- else if(str_eq(arg, "-u")) {
- /* Suppress the initial definition of a variable. */
- if (argc--) {
- arg = *argv++;
- /* Put new u_node on global list. */
-
- p = CAST(struct u_node *)
- mg_alloc(sizeof(struct u_node));
- p -> u_name = arg;
- p -> u_next = undef_list . u_next;
- undef_list . u_next = p;
- }
- else {
- printf("Trailing -u.\n");
- exit(0);
- }
- }
- else if (str_eq(arg, "-?")) {
- /* Ignore it. */
- }
- else if (in == NULL) {
- in = arg;
- }
- else if (out == NULL) {
- out = arg;
- }
- else {
- printf("Extra file argument: %s\n", arg);
- exit(0);
- }
- }
-
- /* Make sure that both file arguments were provided. */
- if (in == NULL) {
- printf("Missing input, output file arguments.\n");
- exit(0);
- }
- else if (out == NULL) {
- printf("Missing output file argument.\n");
- exit(0);
- }
-
- /* Open the input file. */
- if (sysopen(in) == FALSE) {
- printf("Can not open %s\n", in);
- exit(0);
- }
-
- /* Open the output file. */
- if (syscreat(out) == FALSE) {
- printf("Can not open %s\n", out);
- sysabort();
- }
-
- /*
- Initialize the predefined macros (__line__ and __file__) here
- so that they can be suppressed with the -u command line option.
- */
- mst2_init();
- /*
- Call regs_init() here so tracing +init_1, etc. will work.
- */
- regs_init();
-
- /* Start off at a new line. */
- begin_line(TRUE);
-
- if (noparse_flag) {
- out_length = 0;
- do {
- get_token();
- if (token_flag) {
- out_length += strlen(ps_tok(t_type))+1;
- if (out_length >= 70) {
- sysnlput();
- out_length = 0;
- }
- syssput(ps_tok(t_type));
- syscput(' ');
- }
- }
- while (t_type != EOP_TOK);
- }
- else {
- /* Parse the program !! */
- program();
- }
-
- /* Close the output file. */
- sysoclose();
-
- TRACE("dump", SL_DUMP());
- TRACE("stat", mm_stat());
-
- sysend();
- }
-
- /*
- Ready the lex for execution.
- */
- static void
- t_init()
- {
- TICK("t_init");
-
- t_iflevel = 0;
- t_errcount = 0;
- }
-
- /*
- Return the next token from the input file. Set global variables
- (see cpp.h for a list) describing the token and the current file.
- */
-
- #ifdef SHERLOCK
- char bug_s1 [] = "get_token";
- char bug_s2 [] = "t_type %d, %s\n";
- #endif
-
- /* CAUTION: evaluate value only once! */
- #define T_RETURN(value)\
- t_type = value;\
- TRACEP(bug_s1, printf(bug_s2, t_type, ps_tok(t_type)));\
- return;
-
- void
- get_token()
- {
- unsigned char mesgbuf [40];
- unsigned char cbuf [2];
-
- register struct mst_node * p;
- struct mst_node * mst_lookup();
-
- TICK("get_token");
- *t_symbol = '\0';
-
- /*
- This is one of those situations where a 'goto' statement
- probably makes the structure of a program clearer, not more
- obscure.
-
- We branch to the 'rescan' label whenever a construct is seen
- that does not result directly in a token being returned, i.e.,
- for macros, PP directives and whitespace.
- */
-
- rescan: switch (ch) {
-
- TICK("get_token1");
-
- case '\r':
- sysnext();
- goto rescan;
-
- case ' ':
- case '\t':
- TICK("get_token_ws");
-
- sysnext();
- goto rescan;
-
- case '\n':
- TICK("get_token_nl");
-
- /* Allow user to abort here. */
- (void) syscsts();
-
- sysnext();
- do_nl();
- begin_line(TRUE);
- goto rescan;
-
- case '#':
- t_error("unexpected # ignored");
- sysnext();
- goto rescan;
-
- case 'a': case 'b': case 'c': case 'd':
- case 'e': case 'f': case 'g': case 'h':
- case 'i': case 'j': case 'k': case 'l':
- case 'm': case 'n': case 'o': case 'p':
- case 'q': case 'r': case 's': case 't':
- case 'u': case 'v': case 'w': case 'x':
- case 'y': case 'z':
- case '_':
- case 'A': case 'B': case 'C': case 'D':
- case 'E': case 'F': case 'G': case 'H':
- case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'O': case 'P':
- case 'Q': case 'R': case 'S': case 'T':
- case 'U': case 'V': case 'W': case 'X':
- case 'Y': case 'Z':
-
- TICK("get_token_id");
-
- t_id(t_symbol);
- if(is_reserved(t_symbol, t_length)) {
- TRACE("get_token",
- printf("get_token: reserved: t_type %s, %d, %d\n",
- t_symbol, t_type, t_subtype));
- return;
- }
-
- if (!nomacro_flag) {
- p = mst_lookup(t_symbol);
- }
- else {
- p = NULL;
- }
-
- if (p == NULL) {
- t_subtype = 0;
- TRACE("get_token",
- printf("get_token: id: t_type %s, %d/%d\n",
- t_symbol, ID_TOK, t_subtype));
- t_type = ID_TOK;
- return;
- }
- else {
- /* Push back the replacement text. */
- pp_expand(p -> mst_nargs, p -> mst_text);
- goto rescan;
- }
-
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
-
- T_RETURN(t_number());
-
-
- case '"':
- t_string(t_symbol);
- T_RETURN(STRING_TOK);
-
- case '\'':
- t_string(t_symbol);
- t_value = (long) char_val(t_symbol);
- T_RETURN(CHAR_TOK);
-
- /*
- We must be VERY careful about exactly when we switch from one
- input file to the next. This is the place.
- */
-
- case END_FILE:
-
- /* Switch input streams. */
- sysiclose();
- if (t_inlevel == -1) {
- T_RETURN(EOP_TOK);
- }
- else {
- begin_line(TRUE);
- goto rescan;
- }
-
-
- case '=': /* = or == */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(EQUAL_TOK);
- }
- else {
- T_RETURN(ASSN_TOK);
- }
-
-
- case '+': /* + or ++ or += */
-
- sysnext();
- if (ch == '+') {
- sysnext();
- T_RETURN(INC_TOK);
- }
- else if (ch == '=') {
- sysnext();
- T_RETURN(PLUS_ASSN_TOK);
- }
- else {
- T_RETURN(PLUS_TOK);
- }
-
-
- case '-': /* - or -- or -> */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(MINUS_ASSN_TOK);
- }
- else if (ch == '-') {
- sysnext();
- T_RETURN(DEC_TOK);
- }
- else if (ch == '>') {
- sysnext();
- T_RETURN(ARROW_TOK);
- }
- else {
- T_RETURN(MINUS_TOK);
- }
-
-
- case '*': /* * or *= */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(STAR_ASSN_TOK);
- }
- else {
- T_RETURN(STAR_TOK);
- }
-
-
- case '/': /* comment or / or /= */
-
- sysnext();
- if (ch == '*') {
- sysnext();
- t_comment();
- goto rescan;
- }
- else if (ch == '=') {
- sysnext();
- T_RETURN(DIV_ASSN_TOK);
- }
- else {
- T_RETURN(DIV_TOK);
- }
-
-
- case '%': /* % or %= */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(MOD_ASSN_TOK);
- }
- else {
- T_RETURN(MOD_TOK);
- }
-
-
- case '>': /* > or >= or >> or >>= */
-
- sysnext();
- if (ch == '>') {
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(RSHIFT_ASSN_TOK);
- }
- else {
- T_RETURN(RSHIFT_TOK);
- }
- }
- else if (ch == '=') {
- sysnext();
- T_RETURN(GE_TOK);
- }
- else {
- T_RETURN(GT_TOK);
- }
-
-
- case '<': /* < or or <= or << or <<= */
-
- sysnext();
- if (ch == '<') {
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(LSHIFT_ASSN_TOK);
- }
- else {
- T_RETURN(LSHIFT_TOK);
- }
- }
- else if (ch == '=') {
- sysnext();
- T_RETURN(LE_TOK);
- }
- else {
- T_RETURN(LT_TOK);
- }
-
-
- case '!': /* ! or != */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(NE_TOK);
- }
- else {
- T_RETURN(NOT_TOK);
- }
-
-
- case '|': /* | or |= or || */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(OR_ASSN_TOK);
- }
- else if (ch == '|') {
- sysnext();
- T_RETURN(LOR_TOK);
- }
- else {
- T_RETURN(OR_TOK);
- }
-
-
- case '&': /* & or &= or && */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(AND_ASSN_TOK);
- }
- else if (ch == '&') {
- sysnext();
- T_RETURN(LAND_TOK);
- }
- else {
- T_RETURN(AND_TOK);
- }
-
-
- case '^': /* ^ or ^= */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- T_RETURN(XOR_ASSN_TOK);
- }
- else {
- T_RETURN(XOR_TOK);
- }
-
-
- case '?': sysnext(); T_RETURN(QUESTION_TOK);
- case ':': sysnext(); T_RETURN(COLON_TOK);
-
- case '~': sysnext(); T_RETURN(TILDE_TOK);
- case ',': sysnext(); T_RETURN(COMMA_TOK);
- case '(': sysnext(); T_RETURN(LPAREN_TOK);
- case ')': sysnext(); T_RETURN(RPAREN_TOK);
- case '[': sysnext(); T_RETURN(LBRACK_TOK);
- case ']': sysnext(); T_RETURN(RBRACK_TOK);
- case '{': sysnext(); T_RETURN(LCURLY_TOK);
- case '}': sysnext(); T_RETURN(RCURLY_TOK);
- case ';': sysnext(); T_RETURN(SEMICOLON_TOK);
- case '.': sysnext(); T_RETURN(DOT_TOK);
-
- default:
-
- str_cpy(mesgbuf, "character error: ");
- cbuf [0] = ch;
- cbuf [1] = '\0';
- str_cat(mesgbuf, cbuf);
- t_error(mesgbuf);
- sysnext();
- goto rescan;
- }
- }
-
- #undef T_RETURN
-
-
- /*
- Return the next token in a constant expression.
- Return NULL on end of expression.
- Return ERROR on mal-formed expression.
- */
-
- int
- con_token()
- {
- register struct mst_node * p;
- struct mst_node * mst_lookup();
-
- TICK("con_token");
-
- /*
- If you do not wish macros to be legal in #if statements,
- just replace the following with:
-
- if (isid1(ch)) return ERROR;
- */
-
- rescan1:
- if (isid1(ch)) {
- t_id(t_symbol);
- p = mst_lookup(t_symbol);
- if (p == NULL) {
- return ID_TOK;
- }
- else {
- pp_expand(p -> mst_nargs, p -> mst_text);
- goto rescan1;
- }
- }
-
- switch(ch) {
-
- case '\n':
- case END_FILE:
- case '#':
-
- /* Terminate the expression immediately. */
- return ZERO;
-
-
- case ' ':
- case '\t':
- case '\r':
-
- /* Ignore white space except for comments. */
- sysnext();
- goto rescan1;
-
-
- case '\\':
-
- /* Allow continuation lines. */
- sysnext();
- if (ch == '\n') {
- sysnext();
- goto rescan1;
- }
- else {
- return ERROR;
- }
-
-
- case '/':
-
- /* Do not allow comments. */
- sysnext();
- return (ch == '*') ? ERROR : DIV_TOK;
-
-
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
-
- (void) t_number();
- return INT_TOK; /* 1/10/86 */
-
-
- case '\'':
-
- t_string(t_symbol);
- t_value = (long) char_val(t_symbol);
- return CHAR_TOK;
-
-
- case '=': /* == */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- return EQUAL_TOK;
- }
- else {
- return ERROR;
- }
-
-
- case '!': /* != */
-
- sysnext();
- if (ch == '=') {
- sysnext();
- return NE_TOK;
- }
- else {
- return ERROR;
- }
-
-
- case '>': /* > or >= or >> */
-
- sysnext();
- if (ch == '>') {
- sysnext();
- return RSHIFT_TOK;
- }
- else if (ch == '=') {
- sysnext();
- return GE_TOK;
- }
- else {
- return GT_TOK;
- }
-
-
- case '<': /* < or <= or << */
-
- sysnext();
- if (ch == '<') {
- sysnext();
- return LSHIFT_TOK;
- }
- else if (ch == '=') {
- sysnext();
- return LE_TOK;
- }
- else {
- return LT_TOK;
- }
-
-
- case '+': sysnext(); return PLUS_TOK;
- case '-': sysnext(); return MINUS_TOK;
- case '*': sysnext(); return STAR_TOK;
- case '%': sysnext(); return MOD_TOK;
- case '|': sysnext(); return OR_TOK;
- case '&': sysnext(); return AND_TOK;
- case '~': sysnext(); return TILDE_TOK;
- case '?': sysnext(); return QUESTION_TOK;
- case ':': sysnext(); return COLON_TOK;
- case '^': sysnext(); return XOR_TOK;
-
- default: return ERROR;
- }
- }
-
-
- /*
- Do beginning of line processing.
- */
- void
- begin_line(flag)
- int flag;
- {
- TICK("begin_line");
-
- skip_bl();
- if (flag && ch == '#') {
- sysnext();
- do_pp();
- }
- }
-
- /*
- Do end of line processing.
- */
- void
- do_nl()
- {
- t_line++;
-
- TRACEP("do_nl", printf("t_line: %d\n", t_line));
- }
-